library(Momocs)

a=read.table(file.choose(), header=T) # select GMM-TM
names(a)
head(a)
str(a)

create_carnivore_dataframes <- function(data) {
  # Get unique values in the "spc" variable
  spc_values <- unique(data$spc)
  
  # Initialize a list to store data frames
  carnivore_dataframes <- list()
  
  # Iterate through each unique value in "spc"
  for (spc_value in spc_values) {
    # Get the subset of data for the current "spc" value
    subset_data <- subset(data, spc == spc_value)
    
    # Extract the x and y coordinates
    x_coords <- subset_data$x
    y_coords <- subset_data$y
    
    # Create a data frame with the coordinates
    coordinates_df <- data.frame(x = x_coords, y = y_coords)
    
    # Label the data frame with the "spc" value
    dataframe_name <- spc_value
    
    # Store the data frame in the list
    carnivore_dataframes[[dataframe_name]] <- coordinates_df
  }
  
  # Return the list of data frames
  return(carnivore_dataframes)
}

# Usage:
carnivore_dataframes <- create_carnivore_dataframes(a)



#-------------------------------------------------------------------------------
# Accessing the first data frame
first_carnivore_dataframe <- carnivore_dataframes[[1]]

# Viewing the first data frame
print(first_carnivore_dataframe)


# List all the names of the data frames
all_dataframe_names <- names(carnivore_dataframes)

# Print the list of names
print(all_dataframe_names)


#-------------------------------------------------------------------------------
#create the four carnivore groups

# Initialize lists to store data frames for each group
crocs <- list()
hyenas <- list()
leopards <- list()
lions <- list()

# Iterate through each data frame in carnivore_dataframes
for (df_name in names(carnivore_dataframes)) {
  # Get the last character of the data frame name
  last_char <- substr(df_name, nchar(df_name), nchar(df_name))
  
  # Check if the last character of the data frame name is "c", "a", "d", or "n"
  if (last_char == "c") {
    crocs[[df_name]] <- carnivore_dataframes[[df_name]]
  } else if (last_char == "a") {
    hyenas[[df_name]] <- carnivore_dataframes[[df_name]]
  } else if (last_char == "d") {
    leopards[[df_name]] <- carnivore_dataframes[[df_name]]
  } else if (last_char == "n") {
    lions[[df_name]] <- carnivore_dataframes[[df_name]]
  }
}



print(length(crocs))
print(length(hyenas))
print(length(leopards))
print(length(lions))


#-----------------------------------------------------------------------
#compose momocs object

#analysis by individual carnivore type separately

A<-rep("crocodile",52)
B<-rep("leopard",72)
C<-rep("lion", 73)
D<-rep("hyena", 87)

cro<-paste0("croc", 1:52)
leop<-paste0("leop", 1:72)
lio<-paste0("lion", 1:73)
hyen<-paste0("hyena", 1:87)


#crocs

fac3<-data.frame(name=c(cro),
                 taxon=c(A))#these are the categorical variables that accompany coordinate 
TMcroc<-Out(crocs,fac3)
panel(TMcroc, cols="grey90")#lightblue

TMcroc%>%coo_center%>%coo_scale%>%coo_alignxax()%>%coo_alignminradius%>%coo_slidedirection("up")%T>%
  print()%>%stack()


#leopards:
fac4<-data.frame(name=c(leop),
                 taxon=c(B))#these are the categorical variables that accompany coordinate 
TMleop<-Out(leopards,fac4)
panel(TMleop, cols="grey90")#lightblue
TMleop%>%coo_center%>%coo_scale%>%coo_alignxax()%>%coo_alignminradius%>%coo_slidedirection("up")%T>%
  print()%>%stack()


#lions.
fac5<-data.frame(name=c(lio),
                 taxon=c(C))#these are the categorical variables that accompany coordinate 
TMlion<-Out(lions,fac5)
panel(TMlion, cols="grey90")#lightblue
TMlion%>%coo_center%>%coo_scale%>%coo_alignxax()%>%coo_alignminradius%>%coo_slidedirection("up")%T>%
  print()%>%stack()


#hyenas
fac6<-data.frame(name=c(hyen),
                 taxon=c(D))#these are the categorical variables that accompany coordinate 
TMhyena<-Out(hyenas,fac6)
panel(TMhyena, cols="grey90")#lightblue
TMhyena%>%coo_center%>%coo_scale%>%coo_alignxax()%>%coo_alignminradius%>%coo_slidedirection("up")%T>%
  print()%>%stack()

#--------------------------------------------------------------------------------------
#all carnivores together

fac<-data.frame(name=c(cro,leop,lio,hyen),
                taxon=c(A,B,C,D))#these are the categorical variables that accompany coordinate 
TMall<-Out(carnivore_dataframes,fac)
TMall%>%coo_center%>%coo_scale%>%coo_alignxax()%>%coo_alignminradius%>%coo_slidedirection("up")%T>%
  print()%>%stack()

#panel(TMall, cols=col.spring(4)[TMall@fac[,"taxon"]])
library(RColorBrewer)
TMall$fac$taxon <- as.factor(TMall$fac$taxon)# MAKE SURE THAT "TAXON" IS A FACTOR. If so, undo line 248
color_palette <- brewer.pal(4, "Set1")
colors <- color_palette[as.numeric(TMall$fac$taxon)]
panel(TMall, cols = colors)
#--------------------------------------------------------------------------------------

#estimate number of harmonics:

calibrate_harmonicpower_efourier(TMall, id=1:length(TMall), nb.h=25, drop=1,thresh=c(90,95,99,99.9),plot=TRUE)
#nb.h must be half or lower than half the number of points used per outline

calibrate_reconstructions_efourier(TMall,id=24, range=1:9)

#apply harmonics on Fourier coefficients (which determine shape):
tm.f <- efourier(TMall, nb.h=16)#harmonics
boxplot(tm.f,drop=1)

# mean shape, per group
tm.ms <- MSHAPES(tm.f, ~taxon)
# lets rebuild an Out
Out(tm.ms$shp) %>% panel(names=TRUE)


#thin-plate splines:

q<-MSHAPES(tm.f,"taxon",nb.pts=80)$shp
Cr<-q$crocodile
Le<-q$leopard
Li<-q$lion
Hy<-q$hyena
tps_grid(Cr,Le,amp=3,grid.size=20)
tps_grid(Li,Hy,amp=3,grid.size=20)


#------------------------------------------------------------------------------

#CLASSIFICATION ANALYSES

#run PCA
tm.pca=PCA(tm.f)
plot_PCA(tm.pca)
plot_PCA(tm.pca, ~taxon)
PCcontrib(tm.pca,nax=1:3)

plot(tm.pca,~taxon,pos.shp="circle",nb.shp=20, conf.ellipses = 0.95)
plot(tm.pca,~taxon,ellipses=TRUE, pch=1,labelsgroups = FALSE)#ellipsesax=TRUE,
plot(tm.pca,~taxon,pos.shp="circle",nb.shp=20,ellipses=TRUE, pch=1,labelsgroups = FALSE, col.shp="lightblue")#ellipsesax=TRUE

plot(tm.pca, fac=taxon, morphospace=TRUE,pos.shp="circle",nb.shp=20,
     ellipses=TRUE, conf.ellipses=0.95,)

plot(tm.pca,~taxon,density=TRUE,contour=TRUE,lev.contour=10, bg="yellow")#contours


#MANOVA
MANOVA(tm.pca,~taxon,test="Hotelling",retain=0.99,drop)#retain= numero de componentes o % varianza
MANOVA_PW(tm.pca,~taxon,retain=0.99)

#MUltidimensional Scaling

mds<-MDS(tm.pca,method="euclidean",k=2)
plot(mds)


#LDA on the PCA scores
tm.lda=LDA(tm.pca,"taxon", retain=0.99)
tm.lda
tm.lda%>%summary
classification_metrics(tm.lda)

plot_LDA(tm.lda)
plot_CV(tm.lda)
classification_metrics(tm.lda)
plot_CV2(tm.lda)



#------------------------------
#------------------------------
#------------------------------
#------------------------------
#------------------------------
#------------------------------

#APPLY MACHINE LEARNING ALGORITHMS
library(caret)


#generation of PCA after train test split


# Convert the 'Coe' object to a data frame
tm.f_df <- as_df(tm.f)

# Ensure the 'taxon' variable is a factor
tm.f_df$taxon <- as.factor(tm.f_df$taxon)

# Create stratified split
set.seed(123)
train_indices <- createDataPartition(tm.f_df$taxon, p = 0.7, list = FALSE)

# Create training and testing sets
tm.f_train <- tm.f_df[train_indices, ]
tm.f_test <- tm.f_df[-train_indices, ]

# Check the distribution of 'taxon' in both sets
print(table(tm.f_train$taxon))
print(table(tm.f_test$taxon))


# Exclude 'taxon' and 'name' variables and ensure all remaining columns are numeric
numeric_columns <- setdiff(names(tm.f_train), c("taxon", "name"))

# Remove columns with zero variance
non_constant_columns <- numeric_columns[sapply(tm.f_train[, numeric_columns], function(x) sd(x, na.rm = TRUE) > 0)]

# Check if there are any non-constant columns remaining
if (length(non_constant_columns) == 0) {
  stop("No numeric columns with non-zero variance available for PCA")
}

# Perform PCA on the training set (excluding 'taxon' and 'name')
pca_train <- prcomp(tm.f_train[, non_constant_columns], center = TRUE, scale. = TRUE)


# Remove constant columns in the testing set
# Ensure the testing set has the same non-constant columns as the training set
tm.f_test_non_constant <- tm.f_test[, non_constant_columns, drop = FALSE]
if (any(!names(tm.f_test_non_constant) %in% non_constant_columns)) {
  stop("Testing set contains columns not found in training set or missing columns")
}

# Project the testing set onto the PCA components obtained from the training set
tm.f_test_pca <- predict(pca_train, newdata = tm.f_test_non_constant)
tm.f_test_pca <-tm.f_test_pca[,-c(64:66)]#remove additional columns

# Combine the PCA results with the 'taxon' and 'name' variables for interpretation
tm.f_train_pca <- data.frame(pca_train$x, taxon = tm.f_train$taxon)
tm.f_test_pca <- data.frame(tm.f_test_pca, taxon = tm.f_test$taxon)

# Inspect the PCA results
print(head(tm.f_train_pca))
print(head(tm.f_test_pca))


ctrl <- trainControl(method = "repeatedcv", repeats=5)#say we want 5 cross-validation repeats
#------------------------------

#linear discriminant analysis
set.seed(7)
ldaFit <- train(x = tm.f_train_pca[,-63],#12 is the position of categorial variable 
                y = tm.f_train_pca$taxon,
                method = "lda",
                metric = "Kappa",
                tuneLength = 10,
                trControl = ctrl)
pred<-predict(ldaFit,tm.f_test_pca[,-63])
confusionMatrix(pred,tm.f_test_pca$taxon)



#random forest
set.seed(7)
rfFit <- train(x = tm.f_train_pca[,-63],#12 is the position of categorial variable 
                y = tm.f_train_pca$taxon,
                method = "rf",
                metric = "Kappa",
                tuneLength = 10,
                trControl = ctrl)
pred<-predict(rfFit,tm.f_test_pca[,-63])
confusionMatrix(pred,tm.f_test_pca$taxon)


#Support Vector Machine
#Radial
set.seed(7)
vmFit <- train(x = tm.f_train_pca[,-63], 
               y = tm.f_train_pca$taxon,
               method = "svmRadial",
               metric = "Kappa",
               tuneLength = 10,
               trControl = ctrl)
pred<-predict(vmFit,tm.f_test_pca[,-63])
confusionMatrix(pred,tm.f_test_pca$taxon)










# Ensure that the columns in the test data match those in the training data
test_features <- tm.f_test_pca[, -which(names(tm.f_test_pca) == "taxon")]
train_features <- tm.f_train_pca[, -which(names(tm.f_train_pca) == "taxon")]

if (!all(names(test_features) %in% names(train_features))) {
  stop("Mismatch between columns in training and testing data")
}

# Predict using the trained SVM model
pred <- tryCatch({
  predict(vmFit, newdata = test_features)
}, error = function(e) {
  message("Error in prediction: ", e$message)
  NA
})

# Handle missing predictions if any
if (any(is.na(pred))) {
  pred[is.na(pred)] <- "Unknown"  # Replace with a default value or handle appropriately
}

# Generate confusion matrix
confusionMatrix(pred, tm.f_test_pca$taxon)